## "Where are the missing dead" replication
##
## Reproduce: The Parallel trend plot & Event Study Plot 
##
## 			  - Figure 6
##			  - Appendix Figure A3, A7
##
## 
## Input:     main.dta

## Guoer Liu
## 7/30/2022 


setwd("~/Dropbox/Missing dead/replication/")


library(haven)
library(data.table) ## For some minor data wrangling
library(fixest)     ## NB: Requires version >=0.9.0
library(scales)
library(extrafont) ## change fonts in plot


### Function ###


dataCollapse <- function(vector, year.v, treat.ind){
	# Args:
	#	vector: 	n x 1 vector to be collapsed by province-treat
	#	year.v: 	n x 1 vector
	#   treat.ind:  n x 1 vector of treatment indicator
	out <- tapply(vector, 
				  list(year.v, treat.ind), 
				  function(x) mean(x, na.rm = T))
	return(out)
}



paraPlot <- function(collapMat, 
					matrix.x.lab = NULL, matrix.title = NULL,
					l.mar = 3.5,
					hline = NULL,
					x.axis.vec = NULL,
					y.lab = TRUE, y.lab.names = NULL,
					y.min = NULL, y.max = NULL,
					legend = TRUE, leg.pos = NULL){
	# Args:
	#	collapMat:      n x 2 matrix (column 1: control; column 2: treat)
	#   matrix.x.lab:   a string of figure x axis label
	#	matrix.title:	a string of figure title
	#	hline:			a scalar indicate the position of vertical line
	#	x.axis.vec:		a length n string vector that shows x axis
	#   y.lab:			logical parameter to turn off y label
	#   y.lab.names:    a string of y axis lable
	#   y.min:			a scalar indicates the minimum value on y axis
	#   y.max:			a scalar indicates the maximum value on y axis
	#   legend:			logical parameter to turn off legend
	#	leg.pos:		a string indicates legend position
	pch.cex <- 1.6
	ctl.v <- collapMat[, 1]
	tr.v <- collapMat[, 2]
	#y.max <- max(x)
	#y.min <- min(x)
	x.len <- nrow(collapMat) 

	par(mar= c(6, l.mar, 3, 1))
	plot(NA, xlim = c(0.5, x.len), 
	         ylim = c(y.min, y.max), 
	         type = "n", 
	         xlab = matrix.x.lab,
	         main = matrix.title, 
	         ylab = "", axes = F, cex.main = 1.5, cex.lab = 1.3)
	grid(nx = NULL, ny = NULL,
     	 lty = 2,      # Grid line type
     	 col = "gray", # Grid line color
     	 lwd = 0.6)
	abline(v = hline, col = alpha("orangered", 0.8), lwd = 1.5)
	lines(ctl.v, lty = 2, lwd = 1.5)
	points(ctl.v, cex = pch.cex, pch = 1, col = "black")
	lines(tr.v, lty = 1, lwd = 1.5)
	points(tr.v, cex = pch.cex, pch = 19, col = "black", bg = "black")
	if (y.min < 1){
		text(seq_len(x.len)+0.2, -0.4, labels = x.axis.vec, srt = 0, adj = 1, 
			xpd = TRUE)
	} else {
		text(seq_len(x.len)+0.2, 0.93, labels = x.axis.vec, srt = 0, adj = 1, 
			xpd = TRUE)
	}
	axis(1, at = seq(1, x.len, 1), label = rep("", x.len), tick = TRUE)
	#	 cex.axis = 1, las = 1)
	if (y.lab == TRUE){
		axis(2, at = seq(ceiling(y.min), floor(y.max), 1), 
			label = seq(ceiling(y.min), floor(y.max), 1),
			cex.axis = 1, las = 1)
		title(ylab = y.lab.names, line = 2, cex.lab = 1)
	}
	if (legend == TRUE){
		legend(leg.pos, title = NULL, 
			   legend = c("Treated", "Control"), 
			   col = "black", 
			   lty = c(1, 2), cex = 0.8,
			   pch = c(19, 1),
			   lwd = c(1, 1))
	}
}


mydata <- read_dta("main.dta", encoding = "UTF-8")
mydata <- as.data.frame(mydata)

outcome <- c("deathsum_py_lg", "totcount_py_lg",
			 "traf_deathsum_py_lg", "traf_totcount_py_lg",
			 "othr_deathsum_py_lg", "othr_totcount_py_lg")

outcome.raw <- c("deathsum_py", "totcount_py",
			 	 "traf_deathsum_py", "traf_totcount_py",
			 	 "othr_deathsum_py", "othr_totcount_py")

cov.names <- c("log_pop2", "rural_pop_pct",
			   "log_light", "log_total_tax_rev_pc",
			   "log_coal_produ0", "log_mine_indwage",
			   "log_traf_indwage",
			   "sec_tty", "sec_age", 
			   "sec_promotion", "gov_tty", 
			   "gov_age", "gov_promotion")

t.var <- c("did", "treat_time", "treat_group")


mydata$treated_stamp <- ifelse(mydata$treat_group == 1, 2004, NA)
mydata$time_to_treat <- ifelse(mydata$treat_group == 1, mydata$year - 2004, 0)
mydata$year_treated <- ifelse(mydata$treat_group == 0, 10000, 
							  mydata$treated_stamp) ## give the control units a
						  ## fake "treatment" date far outsite the study period
						  ## following Sun and Abraham (2000)



rhs <- paste('~ i(time_to_treat, treat_group, ref = -1) +', 
				paste(cov.names, collapse = " + "), "| prov_id + year")

rhs.sa <- paste('~ sunab(year_treated, year) +', 
				paste(cov.names, collapse = " + "), "| prov_id + year")

rhs.list <- list(rhs, rhs.sa)


mod.out <- list()

mod.out.raw <- list()


## log transformed outcome variable ##

for(i in outcome){
	for(j in 1:length(rhs.list)){
		formula <- as.formula(paste(i, rhs.list[[j]]))
		mod <- feols(formula, cluster = ~prov_id, data = mydata)
		mod.out <- append(mod.out, list(mod))

	}
}


## raw outcome variable ##

for(i in outcome.raw){
	for(j in 1:length(rhs.list)){
		formula <- as.formula(paste(i, rhs.list[[j]]))
		mod <- feols(formula, cluster = ~prov_id, data = mydata)
		mod.out.raw <- append(mod.out.raw, list(mod))
	}
}


## Plot: Event study plot (log transformed)

names(mod.out) <- paste(rep(c("Aggregate", "Traffic", "Others"), each = 4),
					    rep(c("(Deaths)", "(Acc. Counts)"), each = 2))


#pdf("f6_event.pdf", 
#	family = "Times New Roman", width = 8, height = 5)
par(mfcol = c(2, 3), mgp=c(1, 1, 0), mar= c(2,4,3,1))
for(i in seq(1, 12, 2)){
	iplot(list(mod.out[[i]], mod.out[[i+1]]), sep = 0.3, ref.line = -1,
      	  xlab = "Time to treatment",
          main = names(mod.out)[i])
}
	legend("topright", col = c(1, 2), pch = c(20, 17), 
       legend = c("TWFE", "Sun & Abraham (2020)"))
#dev.off()

## Plot: Event study plot (raw data)

names(mod.out.raw) <- paste(rep(c("Raw Aggregate", "Raw Traffic",
								  "Raw Others"), each = 4),
					    	rep(c("(Deaths)", "(Acc. Counts)"), each = 2))

#pdf("fA7_eventRaw.pdf", 
#	family = "Times New Roman", width = 8, height = 5)
par(mfcol = c(2, 3), mgp=c(1, 1, 0), mar= c(2,4,3,1))
for(i in seq(1, 12, 2)){
	iplot(list(mod.out.raw[[i]], mod.out.raw[[i+1]]), sep = 0.3, ref.line = -1,
      	  xlab = "Time to treatment",
          main = names(mod.out.raw)[i])
}
	legend("topright", col = c(1, 2), pch = c(20, 17), 
       legend = c("TWFE", "Sun & Abraham (2020)"))
#dev.off()


### Plot: parallel trend checks ###


deathsum_pylg_tr <- dataCollapse(mydata$deathsum_pylg_tr, 
								 mydata$year, mydata$treat_group)
totcount_pylg_tr <- dataCollapse(mydata$totcount_pylg_tr, 
								 mydata$year, mydata$treat_group)
traf_deathsum_pylg_tr <- dataCollapse(mydata$traf_deathsum_pylg_tr, 
									  mydata$year, mydata$treat_group)
traf_totcount_pylg_tr <- dataCollapse(mydata$traf_totcount_pylg_tr, 
									  mydata$year, mydata$treat_group)
othr_deathsum_pylg_tr <- dataCollapse(mydata$othr_deathsum_pylg_tr, 
									  mydata$year, mydata$treat_group)
othr_totcount_pylg_tr <- dataCollapse(mydata$othr_totcount_pylg_tr, 
									  mydata$year, mydata$treat_group)

#pdf("fA3_para_check.pdf", family = "Times New Roman", width = 10, height = 6.18)
par(mfrow = c(2, 3), mgp=c(3, 1, 0))
paraPlot(deathsum_pylg_tr, "Time", "Aggregate",
		hline = 4, x.axis.vec = seq(2000, 2005, 1), 
		y.lab = TRUE, y.lab.names = "Log (Deaths)",
		y.min = 0.5, y.max = 6.5,
		legend = FALSE, leg.pos = "bottomright")
paraPlot(traf_deathsum_pylg_tr, "Time", "Traffic", l.mar = 1.5,
		hline = 4, x.axis.vec = seq(2000, 2005, 1), 
		y.lab = FALSE,
		y.min = 0.5, y.max = 6.5,
		legend = FALSE, leg.pos = "bottomright")
paraPlot(othr_deathsum_pylg_tr, "Time", "Others", l.mar = 1.5,
		hline = 4, x.axis.vec = seq(2000, 2005, 1), 
		y.lab = FALSE,
		y.min = 0.5, y.max = 6.5,
		legend = TRUE, leg.pos = "topright")
paraPlot(totcount_pylg_tr, "Acc. Counts", "Aggregate",
		hline = 4, x.axis.vec = seq(2000, 2005, 1), 
		y.lab = TRUE, y.lab.names = "Log (Acc. counts)",
		y.min = 0.5, y.max = 6.5,
		legend = FALSE, leg.pos = "bottomright")
paraPlot(traf_totcount_pylg_tr, "Acc. Counts", "Traffic", l.mar = 1.5,
		hline = 4, x.axis.vec = seq(2000, 2005, 1), 
		y.lab = FALSE,
		y.min = 0.5, y.max = 6.5,
		legend = FALSE, leg.pos = "bottomright")
paraPlot(othr_totcount_pylg_tr, "Acc. Counts", "Others", l.mar = 1.5,
		hline = 4, x.axis.vec = seq(2000, 2005, 1), 
		y.lab = FALSE,
		y.min = 0.5, y.max = 6.5,
		legend = TRUE, leg.pos = "topright")
#dev.off()

























